#!coding=utf-8

import settings
from django.core.management.base import BaseCommand, CommandError
from crawl.models import Videos, Category
import urllib2, re
from BeautifulSoup import BeautifulSoup
from datetime import datetime

class Command(BaseCommand):
    help = 'collect 880yan'
    __page_url_template = 'http://www.511zy.in/vodlist/{!category!}_{!page!}.htm'
    __cate_range = [
    {'cid': '35', 'tid':'9', 'type':'艳舞写真', 'actor':'', 'director':'', 'description': '', 'start':1, 'end':25},
    ]
    '''
    __cate_range = [
    {'cid': '27', 'tid':'1', 'type':'亚洲情色', 'actor':'', 'director':'', 'description': '', 'start':1, 'end':239},
    {'cid': '28', 'tid':'2', 'type':'欧美性爱', 'actor':'', 'director':'', 'description': '', 'start':1, 'end':87},
    {'cid': '38', 'tid':'3', 'type':'国产情色', 'actor':'', 'director':'', 'description': '', 'start':1, 'end':31},
    {'cid': '30', 'tid':'4', 'type':'经典三级', 'actor':'', 'director':'', 'description': '', 'start':1, 'end':37},
    {'cid': '29', 'tid':'5', 'type':'成人动漫', 'actor':'', 'director':'', 'description': '', 'start':1, 'end':20},
    {'cid': '31', 'tid':'6', 'type':'强奸乱伦', 'actor':'', 'director':'', 'description': '', 'start':1, 'end':16},
    {'cid': '33', 'tid':'7', 'type':'制服丝袜', 'actor':'', 'director':'', 'description': '', 'start':1, 'end':99},
    {'cid': '32', 'tid':'8', 'type':'变态另类', 'actor':'', 'director':'', 'description': '', 'start':1, 'end':8},
    {'cid': '35', 'tid':'9', 'type':'艳舞写真', 'actor':'', 'director':'', 'description': '', 'start':1, 'end':25},
    {'cid': '37', 'tid':'10', 'type':'人与动物', 'actor':'', 'director':'', 'description': '', 'start':1, 'end':2},
    {'cid': '34', 'tid':'11', 'type':'淫声系列', 'actor':'', 'director':'', 'description': '', 'start':1, 'end':6},
    ]
    '''

    def handle(self, *args, **options):
        for c in self.__cate_range:
            for p in range(c['start'], c['end']):
                url = self.__page_url_template.replace('{!category!}', str(c['cid']))
                url = url.replace('{!page!}', str(p))
                html = urllib2.urlopen(url).read().decode('gb2312', 'ignore').encode('utf-8')
                try:
                    soup = BeautifulSoup(html)
                    vs = soup.find('div', {'class':'leftcontent'}).findAll('li')[1:]
                    for v in vs:
                        soup = BeautifulSoup(str(v))
                        content_url = 'http://www.511zy.in%s'%(soup.find('div', {'class':'list1'}).find('a')['href'])
                        html = urllib2.urlopen(content_url).read().decode('gb2312', 'ignore').encode('utf-8')
                        soup = BeautifulSoup(html)

                        values = {}
                        values['tid'] = c['tid']
                        values['title'] = soup.find('div', {'class':'vpic'}).find('img')['alt']
                        values['type'] = c['type']
                        values['pic'] = 'http://www.511zy.in%s'%(soup.find('div', {'class':'vpic'}).find('img')['src'])
                        values['actor'] = c['actor']
                        values['director'] = c['director']
                        values['description'] = c['description']
                        values['reurl'] = content_url
                        play_url_all = soup.find('div', {'class':'vpl'}).find('ul').findAll('input')
                        a = []
                        [a.append(v['value']) for v in play_url_all]
                        values['play_url'] = '|'.join(a)
                        values['datetime'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

                        video = Videos(**values)
                        try:
                            video.save()
                        except Exception, e:
                            print e
                            continue
                except Exception, e:
                    print e
                    continue

